{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "CtL5xVVyWuUK",
        "colab_type": "text"
      },
      "source": [
        "# Loading the Unsplash Research dataset in Pandas dataframes\n",
        "\n",
        "This notebooks is an example of how to load the Unsplash Research dataset in Pandas dataframes for analysis.\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "xTuZ15NVXZ-D",
        "colab_type": "text"
      },
      "source": [
        "## Loading libraries"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "dnqh7YF2XRob",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "import numpy as np\n",
        "import pandas as pd\n",
        "import glob"
      ],
      "execution_count": 11,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "pOPP3a_MW8V0",
        "colab_type": "text"
      },
      "source": [
        "## Loading the datasets in Pandas\n",
        "\n",
        "Make sure that you correctly point to the correct path."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "1C4TnBTrUCnR",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "path = './'\n",
        "documents = ['photos', 'keywords', 'collections', 'conversions', 'colors']\n",
        "datasets = {}\n",
        "\n",
        "for doc in documents:\n",
        "  files = glob.glob(path + doc + \".tsv*\")\n",
        "\n",
        "  subsets = []\n",
        "  for filename in files:\n",
        "    df = pd.read_csv(filename, sep='\\t', header=0)\n",
        "    subsets.append(df)\n",
        "\n",
        "  datasets[doc] = pd.concat(subsets, axis=0, ignore_index=True)"
      ],
      "execution_count": 15,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "hlJ3gl4BXiIq",
        "colab_type": "text"
      },
      "source": [
        "## Exploring the datasets\n",
        "\n",
        "Here are the first couple of rows from each dataset, as an example.\n",
        "\n",
        "Enjoy the exploration!"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Oqsl7d1kVfeW",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "datasets['photos'].head()"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "-r3bUIlwXnJV",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "datasets['keywords'].head()"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "SIVStDb1XpTL",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "datasets['collections'].head()"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "uDCjYhTeXq8v",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "datasets['conversions'].head()"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "datasets['colors'].head()"
      ]
    }
  ],
  "metadata": {
    "colab": {
      "name": "Loading Unsplash Research datasets in Pandas",
      "provenance": [],
      "collapsed_sections": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}